library(devtools) library(rbenchmark) library(microbenchmark)

Ruta de trabajo

path <- "~/clase/proyecto r/ProyectoVirusTotal/Android"

Operación de carga de datos y transformación a Dataset

Tal y como se ha explicado en el anterior capítulo de clasificación, cargamos los datos en una lista con la información de los json, como vamos a trabajar con los permisos necesitamos que estén presentes en el dataset posterioremente, sin embargo la función de spread_all nos pone a null los elementos que son múltiples, en este caso los permisos se componen de 3 elementos, por eso los tenemos que corregir antes de pasarlos a dataset, una vez hecho eso, volvemos a usar spread_all para tenerlos en dataframe.

files <- dir(path, pattern = "*.json")
cl <- makeCluster(detectCores() -1 )
json_files<-list.files(path =path,pattern="*.json",full.names = TRUE)




json_list<-parLapply(cl,json_files,function(x) jsonlite::read_json(path = x , simplifyVector = TRUE))

for (j in 1:length(json_list)) {
  if(length(json_list[[j]][["additional_info"]][["androguard"]][["Permissions"]]) >0){
    for (k in 1:length(json_list[[j]][["additional_info"]][["androguard"]][["Permissions"]])){
      json_list[[j]][["additional_info"]][["androguard"]][["Permissions"]][[k]] <- TRUE
    }
  }
}

stopCluster(cl)
json_tabla_permisos_cambiados <- json_list %>%
  spread_all()

Filtrado de columnas previo

Filtramos las columnas de sha256 que es lo que usaremos para identificar a cada archivo, aunque por comodidad de ver los gráficos posteriores seguiremos con cada archivo identificado por su posición en las visualizacioes, posteriormente seleccionamos todos los permisos y le asignamos 0 a todos los NA, ya que en este caso tener un NA implica que no tiene ese permiso.

sha_datos_permisos_positives <- as.data.frame(json_tabla_permisos_cambiados) %>% plotly::select( sha256, total, positives , matches("androguard.Permissions.android.permission.*") )
sha_datos_permisos_positives[is.na(sha_datos_permisos_positives)] <- 0

colnames(sha_datos_permisos_positives) <- gsub("additional_info.androguard.Permissions.android.permission.","",colnames(sha_datos_permisos_positives))

Regresión Lineal

Primero probamos con una regresión lineal, con la que obtenemos 0.6469 de r-squared, sin embargo tenemos demasiadas variables en la regresión lineal, las vamos a filtrar usando stepAICm así pasamos de 96 permisos a 23 permisos que son los más relevantes de cara a que den positivos los test.

permisos.regresion <- lm(formula = positives  ~ . -total-sha256,  data = sha_datos_permisos_positives )
plot(permisos.regresion)
## Warning: not plotting observations with leverage one:
##   4, 25, 31, 34, 40, 41, 45, 56, 65, 68, 75, 78, 85, 103, 124, 136, 152, 181

summary(permisos.regresion)
## 
## Call:
## lm(formula = positives ~ . - total - sha256, data = sha_datos_permisos_positives)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -7.109 -1.043  0.000  1.500  5.891 
## 
## Coefficients: (60 not defined because of singularities)
##                                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                           22.1087     0.3917  56.438  < 2e-16 ***
## SYSTEM_ALERT_WINDOW                   -0.8869     2.6578  -0.334 0.739099    
## ACCESS_NETWORK_STATE                  -3.7415     4.4829  -0.835 0.405302    
## ACCESS_COARSE_LOCATION                 5.4804     3.5143   1.559 0.121049    
## WAKE_LOCK                            -11.0668     3.6681  -3.017 0.003012 ** 
## INTERNET                               8.6567     4.2323   2.045 0.042610 *  
## WRITE_EXTERNAL_STORAGE                -0.9811     1.4137  -0.694 0.488780    
## RECEIVE_BOOT_COMPLETED                -3.0559     4.2219  -0.724 0.470332    
## QUICKBOOT_POWERON                     15.8335    47.2107   0.335 0.737820    
## ACCESS_WIFI_STATE                      7.3468     3.1571   2.327 0.021336 *  
## GET_TASKS                             12.8226     7.5549   1.697 0.091778 .  
## CHANGE_WIFI_STATE                     -5.2993    20.6905  -0.256 0.798215    
## READ_PHONE_STATE                     -10.0000     4.2848  -2.334 0.020967 *  
## BLUETOOTH                            -12.5924    30.1403  -0.418 0.676713    
## REQUEST_IGNORE_BATTERY_OPTIMIZATIONS  11.6521     8.7418   1.333 0.184636    
## REQUEST_DELETE_PACKAGES              -18.3770    30.1950  -0.609 0.543728    
## QUERY_ALL_PACKAGES                    19.8424    59.6554   0.333 0.739900    
## SEND_SMS                              14.9571     4.1688   3.588 0.000454 ***
## KILL_BACKGROUND_PROCESSES            -19.5711    19.4014  -1.009 0.314765    
## WRITE_SMS                            -13.5012    51.8686  -0.260 0.795003    
## CALL_PHONE                             3.0826     5.3819   0.573 0.567682    
## VIBRATE                                2.6961     4.5887   0.588 0.557744    
## RECEIVE_SMS                           15.7169    31.5914   0.498 0.619580    
## READ_CONTACTS                         -4.2612    28.1387  -0.151 0.879841    
## FOREGROUND_SERVICE                    -3.2051     5.1995  -0.616 0.538571    
## READ_SMS                               3.0112    27.2689   0.110 0.912223    
## READ_PHONE_NUMBERS                    -6.2717    20.8237  -0.301 0.763705    
## GET_ACCOUNTS                          39.7731    36.5563   1.088 0.278389    
## AUTHENTICATE_ACCOUNTS                -77.1361    18.2014  -4.238 3.98e-05 ***
## RECEIVE_MMS                           37.4320    31.6962   1.181 0.239538    
## REORDER_TASKS                        -12.3745    20.5625  -0.602 0.548240    
## WRITE_SYNC_SETTINGS                        NA         NA      NA       NA    
## USE_FULL_SCREEN_INTENT                30.6103    13.1541   2.327 0.021337 *  
## CHANGE_NETWORK_STATE                   3.9825    41.8555   0.095 0.924327    
## ACCESS_COARSE_UPDATES                      NA         NA      NA       NA    
## PROCESS_OUTGOING_CALLS                     NA         NA      NA       NA    
## BLUETOOTH_ADMIN                            NA         NA      NA       NA    
## ACCESS_FINE_LOCATION                 -11.9546     5.4484  -2.194 0.029805 *  
## BOOT_COMPLETED                             NA         NA      NA       NA    
## REQUEST_INSTALL_PACKAGES             -16.2227     9.2944  -1.745 0.083014 .  
## RECEIVE_USER_PRESENT                       NA         NA      NA       NA    
## ACCESS_LOCATION_EXTRA_COMMANDS             NA         NA      NA       NA    
## WRITE_CALL_LOG                             NA         NA      NA       NA    
## READ_CALL_LOG                              NA         NA      NA       NA    
## ACCESS_BACKGROUND_LOCATION                 NA         NA      NA       NA    
## WRITE_CONTACTS                             NA         NA      NA       NA    
## READ_EXTERNAL_STORAGE                 -1.7843     2.5368  -0.703 0.482939    
## BROADCAST_STICKY                           NA         NA      NA       NA    
## MODIFY_AUDIO_SETTINGS                      NA         NA      NA       NA    
## SYSTEM_OVERLAY_WINDOW                 23.4279     7.8390   2.989 0.003289 ** 
## RECORD_AUDIO                               NA         NA      NA       NA    
## MOUNT_UNMOUNT_FILESYSTEMS                  NA         NA      NA       NA    
## ANSWER_PHONE_CALLS                         NA         NA      NA       NA    
## POWER_SERVICE                              NA         NA      NA       NA    
## DISABLE_KEYGUARD                           NA         NA      NA       NA    
## ACCESS_MOCK_LOCATION                       NA         NA      NA       NA    
## READ_USER_DICTIONARY                       NA         NA      NA       NA    
## INTERACT_ACROSS_USERS_FULL                 NA         NA      NA       NA    
## CHANGE_WIFI_MULTICAST_STATE                NA         NA      NA       NA    
## READ_INTERNAL_STORAGE                      NA         NA      NA       NA    
## DOWNLOAD_WITHOUT_NOTIFICATION              NA         NA      NA       NA    
## ACCESS_CACHE_FILESYSTEM                    NA         NA      NA       NA    
## ACCESS_MTK_MMHW                            NA         NA      NA       NA    
## DIAGNOSTIC                                 NA         NA      NA       NA    
## WRITE_SETTINGS                             NA         NA      NA       NA    
## SAMSUNG_TUNTAP                             NA         NA      NA       NA    
## WRITE_SECURE_SETTINGS                      NA         NA      NA       NA    
## PACKAGE_USAGE_STATS                        NA         NA      NA       NA    
## WRITE_INTERNAL_STORAGE                     NA         NA      NA       NA    
## READ_LOGS                                  NA         NA      NA       NA    
## READ_PRIVILEGED_PHONE_STATE                NA         NA      NA       NA    
## READ_CALENDAR                              NA         NA      NA       NA    
## WRITE_CALENDAR                             NA         NA      NA       NA    
## INJECT_EVENTS                              NA         NA      NA       NA    
## ACCESS_SUPERUSER                           NA         NA      NA       NA    
## ACCESS_NOTIFICATION_POLICY                 NA         NA      NA       NA    
## CAMERA                                     NA         NA      NA       NA    
## USER_PRESENT                               NA         NA      NA       NA    
## SET_WALLPAPER                              NA         NA      NA       NA    
## INSTALL_PACKAGES                           NA         NA      NA       NA    
## DELETE_PACKAGES                            NA         NA      NA       NA    
## RESTART_PACKAGES                           NA         NA      NA       NA    
## CHANGE_CONFIGURATION                       NA         NA      NA       NA    
## SET_WALLPAPER_HINTS                        NA         NA      NA       NA    
## BROADCAST_PACKAGE_CHANGED                  NA         NA      NA       NA    
## BROADCAST_PACKAGE_REPLACED                 NA         NA      NA       NA    
## BROADCAST_PACKAGE_INSTALL                  NA         NA      NA       NA    
## FLASHLIGHT                                 NA         NA      NA       NA    
## BROADCAST_PACKAGE_ADDED                    NA         NA      NA       NA    
## READ_APP_BADGE                             NA         NA      NA       NA    
## USES_POLICY_WIPE_DATA                      NA         NA      NA       NA    
## BIND_DEVICE_ADMIN                          NA         NA      NA       NA    
## SDCARD_WRITE                               NA         NA      NA       NA    
## START_ACTIVITIES_FROM_BACKGROUND           NA         NA      NA       NA    
## MANAGE_OWN_CALLS                           NA         NA      NA       NA    
## MANAGE_EXTERNAL_STORAGE                    NA         NA      NA       NA    
## BIND_CALL_REDIRECTION_SERVICE              NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.03 on 146 degrees of freedom
## Multiple R-squared:  0.6469, Adjusted R-squared:  0.5598 
## F-statistic: 7.429 on 36 and 146 DF,  p-value: < 2.2e-16

Ejecuto esta función por separado para poder evitar mostrar la salida

permisos.regresion_filtrados <- stepAIC(permisos.regresion, direction="both")
permisos.regresion_filtrados$anova 
## Stepwise Model Path 
## Analysis of Deviance Table
## 
## Initial Model:
## positives ~ (sha256 + total + SYSTEM_ALERT_WINDOW + ACCESS_NETWORK_STATE + 
##     ACCESS_COARSE_LOCATION + WAKE_LOCK + INTERNET + WRITE_EXTERNAL_STORAGE + 
##     RECEIVE_BOOT_COMPLETED + QUICKBOOT_POWERON + ACCESS_WIFI_STATE + 
##     GET_TASKS + CHANGE_WIFI_STATE + READ_PHONE_STATE + BLUETOOTH + 
##     REQUEST_IGNORE_BATTERY_OPTIMIZATIONS + REQUEST_DELETE_PACKAGES + 
##     QUERY_ALL_PACKAGES + SEND_SMS + KILL_BACKGROUND_PROCESSES + 
##     WRITE_SMS + CALL_PHONE + VIBRATE + RECEIVE_SMS + READ_CONTACTS + 
##     FOREGROUND_SERVICE + READ_SMS + READ_PHONE_NUMBERS + GET_ACCOUNTS + 
##     AUTHENTICATE_ACCOUNTS + RECEIVE_MMS + REORDER_TASKS + WRITE_SYNC_SETTINGS + 
##     USE_FULL_SCREEN_INTENT + CHANGE_NETWORK_STATE + ACCESS_COARSE_UPDATES + 
##     PROCESS_OUTGOING_CALLS + BLUETOOTH_ADMIN + ACCESS_FINE_LOCATION + 
##     BOOT_COMPLETED + REQUEST_INSTALL_PACKAGES + RECEIVE_USER_PRESENT + 
##     ACCESS_LOCATION_EXTRA_COMMANDS + WRITE_CALL_LOG + READ_CALL_LOG + 
##     ACCESS_BACKGROUND_LOCATION + WRITE_CONTACTS + READ_EXTERNAL_STORAGE + 
##     BROADCAST_STICKY + MODIFY_AUDIO_SETTINGS + SYSTEM_OVERLAY_WINDOW + 
##     RECORD_AUDIO + MOUNT_UNMOUNT_FILESYSTEMS + ANSWER_PHONE_CALLS + 
##     POWER_SERVICE + DISABLE_KEYGUARD + ACCESS_MOCK_LOCATION + 
##     READ_USER_DICTIONARY + INTERACT_ACROSS_USERS_FULL + CHANGE_WIFI_MULTICAST_STATE + 
##     READ_INTERNAL_STORAGE + DOWNLOAD_WITHOUT_NOTIFICATION + ACCESS_CACHE_FILESYSTEM + 
##     ACCESS_MTK_MMHW + DIAGNOSTIC + WRITE_SETTINGS + SAMSUNG_TUNTAP + 
##     WRITE_SECURE_SETTINGS + PACKAGE_USAGE_STATS + WRITE_INTERNAL_STORAGE + 
##     READ_LOGS + READ_PRIVILEGED_PHONE_STATE + READ_CALENDAR + 
##     WRITE_CALENDAR + INJECT_EVENTS + ACCESS_SUPERUSER + ACCESS_NOTIFICATION_POLICY + 
##     CAMERA + USER_PRESENT + SET_WALLPAPER + INSTALL_PACKAGES + 
##     DELETE_PACKAGES + RESTART_PACKAGES + CHANGE_CONFIGURATION + 
##     SET_WALLPAPER_HINTS + BROADCAST_PACKAGE_CHANGED + BROADCAST_PACKAGE_REPLACED + 
##     BROADCAST_PACKAGE_INSTALL + FLASHLIGHT + BROADCAST_PACKAGE_ADDED + 
##     READ_APP_BADGE + USES_POLICY_WIPE_DATA + BIND_DEVICE_ADMIN + 
##     SDCARD_WRITE + START_ACTIVITIES_FROM_BACKGROUND + MANAGE_OWN_CALLS + 
##     MANAGE_EXTERNAL_STORAGE + BIND_CALL_REDIRECTION_SERVICE) - 
##     total - sha256
## 
## Final Model:
## positives ~ WAKE_LOCK + INTERNET + QUICKBOOT_POWERON + ACCESS_WIFI_STATE + 
##     GET_TASKS + READ_PHONE_STATE + BLUETOOTH + QUERY_ALL_PACKAGES + 
##     SEND_SMS + KILL_BACKGROUND_PROCESSES + WRITE_SMS + RECEIVE_SMS + 
##     FOREGROUND_SERVICE + GET_ACCOUNTS + AUTHENTICATE_ACCOUNTS + 
##     RECEIVE_MMS + REORDER_TASKS + USE_FULL_SCREEN_INTENT + ACCESS_FINE_LOCATION + 
##     REQUEST_INSTALL_PACKAGES + SYSTEM_OVERLAY_WINDOW + READ_CALENDAR + 
##     WRITE_CONTACTS
## 
## 
##                                      Step Df    Deviance Resid. Df Resid. Dev
## 1                                                              146   1340.216
## 2         - BIND_CALL_REDIRECTION_SERVICE  0  0.00000000       146   1340.216
## 3               - MANAGE_EXTERNAL_STORAGE  0  0.00000000       146   1340.216
## 4                      - MANAGE_OWN_CALLS  0  0.00000000       146   1340.216
## 5      - START_ACTIVITIES_FROM_BACKGROUND  0  0.00000000       146   1340.216
## 6                          - SDCARD_WRITE  0  0.00000000       146   1340.216
## 7                     - BIND_DEVICE_ADMIN  0  0.00000000       146   1340.216
## 8                 - USES_POLICY_WIPE_DATA  0  0.00000000       146   1340.216
## 9                        - READ_APP_BADGE  0  0.00000000       146   1340.216
## 10              - BROADCAST_PACKAGE_ADDED  0  0.00000000       146   1340.216
## 11                           - FLASHLIGHT  0  0.00000000       146   1340.216
## 12            - BROADCAST_PACKAGE_INSTALL  0  0.00000000       146   1340.216
## 13           - BROADCAST_PACKAGE_REPLACED  0  0.00000000       146   1340.216
## 14            - BROADCAST_PACKAGE_CHANGED  0  0.00000000       146   1340.216
## 15                  - SET_WALLPAPER_HINTS  0  0.00000000       146   1340.216
## 16                 - CHANGE_CONFIGURATION  0  0.00000000       146   1340.216
## 17                     - RESTART_PACKAGES  0  0.00000000       146   1340.216
## 18                      - DELETE_PACKAGES  0  0.00000000       146   1340.216
## 19                     - INSTALL_PACKAGES  0  0.00000000       146   1340.216
## 20                        - SET_WALLPAPER  0  0.00000000       146   1340.216
## 21                         - USER_PRESENT  0  0.00000000       146   1340.216
## 22                               - CAMERA  0  0.00000000       146   1340.216
## 23           - ACCESS_NOTIFICATION_POLICY  0  0.00000000       146   1340.216
## 24                     - ACCESS_SUPERUSER  0  0.00000000       146   1340.216
## 25                        - INJECT_EVENTS  0  0.00000000       146   1340.216
## 26                       - WRITE_CALENDAR  0  0.00000000       146   1340.216
## 27                        - READ_CALENDAR  0  0.00000000       146   1340.216
## 28          - READ_PRIVILEGED_PHONE_STATE  0  0.00000000       146   1340.216
## 29                            - READ_LOGS  0  0.00000000       146   1340.216
## 30               - WRITE_INTERNAL_STORAGE  0  0.00000000       146   1340.216
## 31                  - PACKAGE_USAGE_STATS  0  0.00000000       146   1340.216
## 32                - WRITE_SECURE_SETTINGS  0  0.00000000       146   1340.216
## 33                       - SAMSUNG_TUNTAP  0  0.00000000       146   1340.216
## 34                       - WRITE_SETTINGS  0  0.00000000       146   1340.216
## 35                           - DIAGNOSTIC  0  0.00000000       146   1340.216
## 36                      - ACCESS_MTK_MMHW  0  0.00000000       146   1340.216
## 37              - ACCESS_CACHE_FILESYSTEM  0  0.00000000       146   1340.216
## 38        - DOWNLOAD_WITHOUT_NOTIFICATION  0  0.00000000       146   1340.216
## 39                - READ_INTERNAL_STORAGE  0  0.00000000       146   1340.216
## 40          - CHANGE_WIFI_MULTICAST_STATE  0  0.00000000       146   1340.216
## 41           - INTERACT_ACROSS_USERS_FULL  0  0.00000000       146   1340.216
## 42                 - READ_USER_DICTIONARY  0  0.00000000       146   1340.216
## 43                 - ACCESS_MOCK_LOCATION  0  0.00000000       146   1340.216
## 44                     - DISABLE_KEYGUARD  0  0.00000000       146   1340.216
## 45                        - POWER_SERVICE  0  0.00000000       146   1340.216
## 46                   - ANSWER_PHONE_CALLS  0  0.00000000       146   1340.216
## 47            - MOUNT_UNMOUNT_FILESYSTEMS  0  0.00000000       146   1340.216
## 48                         - RECORD_AUDIO  0  0.00000000       146   1340.216
## 49                - MODIFY_AUDIO_SETTINGS  0  0.00000000       146   1340.216
## 50                     - BROADCAST_STICKY  0  0.00000000       146   1340.216
## 51                       - WRITE_CONTACTS  0  0.00000000       146   1340.216
## 52           - ACCESS_BACKGROUND_LOCATION  0  0.00000000       146   1340.216
## 53                        - READ_CALL_LOG  0  0.00000000       146   1340.216
## 54                       - WRITE_CALL_LOG  0  0.00000000       146   1340.216
## 55       - ACCESS_LOCATION_EXTRA_COMMANDS  0  0.00000000       146   1340.216
## 56                 - RECEIVE_USER_PRESENT  0  0.00000000       146   1340.216
## 57                       - BOOT_COMPLETED  0  0.00000000       146   1340.216
## 58                      - BLUETOOTH_ADMIN  0  0.00000000       146   1340.216
## 59               - PROCESS_OUTGOING_CALLS  0  0.00000000       146   1340.216
## 60                - ACCESS_COARSE_UPDATES  0  0.00000000       146   1340.216
## 61                  - WRITE_SYNC_SETTINGS  0  0.00000000       146   1340.216
## 62                 - CHANGE_NETWORK_STATE  1  0.08310525       147   1340.299
## 63                             - READ_SMS  1  0.03843852       148   1340.337
## 64                        - READ_CONTACTS  1  0.58145426       149   1340.919
## 65                  - SYSTEM_ALERT_WINDOW  1  1.29585809       150   1342.214
## 66                   - READ_PHONE_NUMBERS  1  1.18654652       151   1343.401
## 67                           - CALL_PHONE  1  4.30800958       152   1347.709
## 68               - WRITE_EXTERNAL_STORAGE  1  7.72828134       153   1355.437
## 69                    - CHANGE_WIFI_STATE  1  8.87030329       154   1364.308
## 70                              - VIBRATE  1  3.31596078       155   1367.624
## 71               - ACCESS_COARSE_LOCATION  1 13.95012507       156   1381.574
## 72                 - ACCESS_NETWORK_STATE  1 10.84410359       157   1392.418
## 73               - RECEIVE_BOOT_COMPLETED  1 13.72871214       158   1406.147
## 74                        + READ_CALENDAR  1 17.24512352       157   1388.901
## 75 - REQUEST_IGNORE_BATTERY_OPTIMIZATIONS  1  9.13624727       158   1398.038
## 76                - READ_EXTERNAL_STORAGE  1  7.98060122       159   1406.018
## 77                       + WRITE_CONTACTS  1 15.66279105       158   1390.355
## 78              - REQUEST_DELETE_PACKAGES  1  0.23330862       159   1390.589
##         AIC
## 1  438.3712
## 2  438.3712
## 3  438.3712
## 4  438.3712
## 5  438.3712
## 6  438.3712
## 7  438.3712
## 8  438.3712
## 9  438.3712
## 10 438.3712
## 11 438.3712
## 12 438.3712
## 13 438.3712
## 14 438.3712
## 15 438.3712
## 16 438.3712
## 17 438.3712
## 18 438.3712
## 19 438.3712
## 20 438.3712
## 21 438.3712
## 22 438.3712
## 23 438.3712
## 24 438.3712
## 25 438.3712
## 26 438.3712
## 27 438.3712
## 28 438.3712
## 29 438.3712
## 30 438.3712
## 31 438.3712
## 32 438.3712
## 33 438.3712
## 34 438.3712
## 35 438.3712
## 36 438.3712
## 37 438.3712
## 38 438.3712
## 39 438.3712
## 40 438.3712
## 41 438.3712
## 42 438.3712
## 43 438.3712
## 44 438.3712
## 45 438.3712
## 46 438.3712
## 47 438.3712
## 48 438.3712
## 49 438.3712
## 50 438.3712
## 51 438.3712
## 52 438.3712
## 53 438.3712
## 54 438.3712
## 55 438.3712
## 56 438.3712
## 57 438.3712
## 58 438.3712
## 59 438.3712
## 60 438.3712
## 61 438.3712
## 62 436.3826
## 63 434.3878
## 64 432.4672
## 65 430.6440
## 66 428.8057
## 67 427.3916
## 68 426.4380
## 69 425.6317
## 70 424.0759
## 71 423.9331
## 72 423.3639
## 73 423.1593
## 74 422.9011
## 75 422.1010
## 76 421.1427
## 77 421.0926
## 78 419.1233
anova(permisos.regresion,permisos.regresion_filtrados)
## Analysis of Variance Table
## 
## Model 1: positives ~ (sha256 + total + SYSTEM_ALERT_WINDOW + ACCESS_NETWORK_STATE + 
##     ACCESS_COARSE_LOCATION + WAKE_LOCK + INTERNET + WRITE_EXTERNAL_STORAGE + 
##     RECEIVE_BOOT_COMPLETED + QUICKBOOT_POWERON + ACCESS_WIFI_STATE + 
##     GET_TASKS + CHANGE_WIFI_STATE + READ_PHONE_STATE + BLUETOOTH + 
##     REQUEST_IGNORE_BATTERY_OPTIMIZATIONS + REQUEST_DELETE_PACKAGES + 
##     QUERY_ALL_PACKAGES + SEND_SMS + KILL_BACKGROUND_PROCESSES + 
##     WRITE_SMS + CALL_PHONE + VIBRATE + RECEIVE_SMS + READ_CONTACTS + 
##     FOREGROUND_SERVICE + READ_SMS + READ_PHONE_NUMBERS + GET_ACCOUNTS + 
##     AUTHENTICATE_ACCOUNTS + RECEIVE_MMS + REORDER_TASKS + WRITE_SYNC_SETTINGS + 
##     USE_FULL_SCREEN_INTENT + CHANGE_NETWORK_STATE + ACCESS_COARSE_UPDATES + 
##     PROCESS_OUTGOING_CALLS + BLUETOOTH_ADMIN + ACCESS_FINE_LOCATION + 
##     BOOT_COMPLETED + REQUEST_INSTALL_PACKAGES + RECEIVE_USER_PRESENT + 
##     ACCESS_LOCATION_EXTRA_COMMANDS + WRITE_CALL_LOG + READ_CALL_LOG + 
##     ACCESS_BACKGROUND_LOCATION + WRITE_CONTACTS + READ_EXTERNAL_STORAGE + 
##     BROADCAST_STICKY + MODIFY_AUDIO_SETTINGS + SYSTEM_OVERLAY_WINDOW + 
##     RECORD_AUDIO + MOUNT_UNMOUNT_FILESYSTEMS + ANSWER_PHONE_CALLS + 
##     POWER_SERVICE + DISABLE_KEYGUARD + ACCESS_MOCK_LOCATION + 
##     READ_USER_DICTIONARY + INTERACT_ACROSS_USERS_FULL + CHANGE_WIFI_MULTICAST_STATE + 
##     READ_INTERNAL_STORAGE + DOWNLOAD_WITHOUT_NOTIFICATION + ACCESS_CACHE_FILESYSTEM + 
##     ACCESS_MTK_MMHW + DIAGNOSTIC + WRITE_SETTINGS + SAMSUNG_TUNTAP + 
##     WRITE_SECURE_SETTINGS + PACKAGE_USAGE_STATS + WRITE_INTERNAL_STORAGE + 
##     READ_LOGS + READ_PRIVILEGED_PHONE_STATE + READ_CALENDAR + 
##     WRITE_CALENDAR + INJECT_EVENTS + ACCESS_SUPERUSER + ACCESS_NOTIFICATION_POLICY + 
##     CAMERA + USER_PRESENT + SET_WALLPAPER + INSTALL_PACKAGES + 
##     DELETE_PACKAGES + RESTART_PACKAGES + CHANGE_CONFIGURATION + 
##     SET_WALLPAPER_HINTS + BROADCAST_PACKAGE_CHANGED + BROADCAST_PACKAGE_REPLACED + 
##     BROADCAST_PACKAGE_INSTALL + FLASHLIGHT + BROADCAST_PACKAGE_ADDED + 
##     READ_APP_BADGE + USES_POLICY_WIPE_DATA + BIND_DEVICE_ADMIN + 
##     SDCARD_WRITE + START_ACTIVITIES_FROM_BACKGROUND + MANAGE_OWN_CALLS + 
##     MANAGE_EXTERNAL_STORAGE + BIND_CALL_REDIRECTION_SERVICE) - 
##     total - sha256
## Model 2: positives ~ WAKE_LOCK + INTERNET + QUICKBOOT_POWERON + ACCESS_WIFI_STATE + 
##     GET_TASKS + READ_PHONE_STATE + BLUETOOTH + QUERY_ALL_PACKAGES + 
##     SEND_SMS + KILL_BACKGROUND_PROCESSES + WRITE_SMS + RECEIVE_SMS + 
##     FOREGROUND_SERVICE + GET_ACCOUNTS + AUTHENTICATE_ACCOUNTS + 
##     RECEIVE_MMS + REORDER_TASKS + USE_FULL_SCREEN_INTENT + ACCESS_FINE_LOCATION + 
##     REQUEST_INSTALL_PACKAGES + SYSTEM_OVERLAY_WINDOW + READ_CALENDAR + 
##     WRITE_CONTACTS
##   Res.Df    RSS  Df Sum of Sq      F Pr(>F)
## 1    146 1340.2                            
## 2    159 1390.6 -13   -50.373 0.4221 0.9599
summary(permisos.regresion_filtrados)
## 
## Call:
## lm(formula = positives ~ WAKE_LOCK + INTERNET + QUICKBOOT_POWERON + 
##     ACCESS_WIFI_STATE + GET_TASKS + READ_PHONE_STATE + BLUETOOTH + 
##     QUERY_ALL_PACKAGES + SEND_SMS + KILL_BACKGROUND_PROCESSES + 
##     WRITE_SMS + RECEIVE_SMS + FOREGROUND_SERVICE + GET_ACCOUNTS + 
##     AUTHENTICATE_ACCOUNTS + RECEIVE_MMS + REORDER_TASKS + USE_FULL_SCREEN_INTENT + 
##     ACCESS_FINE_LOCATION + REQUEST_INSTALL_PACKAGES + SYSTEM_OVERLAY_WINDOW + 
##     READ_CALENDAR + WRITE_CONTACTS, data = sha_datos_permisos_positives)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.0328 -1.1113  0.1194  1.4543  5.9672 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                22.0328     0.3786  58.188  < 2e-16 ***
## WAKE_LOCK                  -6.6217     1.4399  -4.599 8.63e-06 ***
## INTERNET                    3.9830     1.4177   2.810 0.005584 ** 
## QUICKBOOT_POWERON           6.2217     2.3721   2.623 0.009568 ** 
## ACCESS_WIFI_STATE           6.4581     1.5943   4.051 7.97e-05 ***
## GET_TASKS                   8.5644     2.1436   3.995 9.86e-05 ***
## READ_PHONE_STATE           -8.9045     1.8010  -4.944 1.92e-06 ***
## BLUETOOTH                 -11.7024     2.2404  -5.223 5.44e-07 ***
## QUERY_ALL_PACKAGES         16.6052     5.4534   3.045 0.002725 ** 
## SEND_SMS                   13.7427     2.4937   5.511 1.41e-07 ***
## KILL_BACKGROUND_PROCESSES -12.0794     4.2082  -2.870 0.004657 ** 
## WRITE_SMS                 -19.5729     4.6649  -4.196 4.51e-05 ***
## RECEIVE_SMS                16.4283     3.1045   5.292 3.96e-07 ***
## FOREGROUND_SERVICE         -7.8335     1.6325  -4.799 3.66e-06 ***
## GET_ACCOUNTS               34.9528     5.8646   5.960 1.57e-08 ***
## AUTHENTICATE_ACCOUNTS     -74.0444    10.5581  -7.013 6.31e-11 ***
## RECEIVE_MMS                25.6096     4.2729   5.993 1.33e-08 ***
## REORDER_TASKS              -8.7765     4.2649  -2.058 0.041237 *  
## USE_FULL_SCREEN_INTENT     25.5969     4.8965   5.228 5.33e-07 ***
## ACCESS_FINE_LOCATION       -9.9183     2.2945  -4.323 2.71e-05 ***
## REQUEST_INSTALL_PACKAGES   -9.6973     2.8780  -3.369 0.000945 ***
## SYSTEM_OVERLAY_WINDOW      21.6861     4.2880   5.057 1.16e-06 ***
## READ_CALENDAR              11.6998     2.8949   4.042 8.25e-05 ***
## WRITE_CONTACTS             -7.8915     3.8798  -2.034 0.043615 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.957 on 159 degrees of freedom
## Multiple R-squared:  0.6336, Adjusted R-squared:  0.5806 
## F-statistic: 11.95 on 23 and 159 DF,  p-value: < 2.2e-16

Reglas de asociación

En un dataset como éste donde queremos predecir si será positivo en función de los permisos la mejor opción sería aplicar reglas de asociación, pero es que no tengo suficiente memoria ram en el ordenador ni siquiera con minlen=2 y support=0.8 me sigue fallando.

test_apriori <- apriori(sha_datos_permisos_positives, minlen=2, support=0.8)

Funciones de visualizacion fcaR

plot_interactivo <- function(fca){
  matriz_descompuesta <- as.matrix(t(fca[["I"]]))
  plot_interactivo <- plot_ly(z=matriz_descompuesta, data=as.data.frame(matriz_descompuesta), type = "heatmap", colors = "Greys", x=colnames(matriz_descompuesta), y=rownames(matriz_descompuesta))%>% layout(xaxis = list(autotypenumbers ='strict', type='category'), yaxis = list(autotypenumbers ='strict', dtick=1 ))
  return(plot_interactivo)
}
plot_dendograma <- function(fca){
  matriz_descompuesta <- as.matrix(t(fca[["I"]]))
  heatmap(matriz_descompuesta, col=c("White","Black"))
}
plot_dendograma_interactico <- function(fca){
  matriz_descompuesta <- as.matrix(t(fca[["I"]]))
  heatmaply(matriz_descompuesta, col=c("White","Black"))
}

Formal Concepts Analysis

Finalmente hacemos un fca para ver como se agrupan los permisos por si algunos más comunes que otros. Hay 46 grupos de permisos que son irreduciblesx y 31 grupos de archivos distintos.

fc_permisos <- FormalContext$new(sha_datos_permisos_positives[,4:99])
fc_permisos$clarify()
fc_permisos$reduce()
fc_permisos$find_concepts()
fc_permisos$find_implications()
fc_permisos$standardize()
## FormalContext with 31 objects and 46 attributes.
##      M1  M2  M3  M4  M5  M6  M7  M8  M9  M10  M11  M12  M13  M14  M15  M16  M17  M18  M19  M20  M21  
##   J1  X   X   X   X   X   X                                                                          
##   J2  X       X   X           X                                              X                       
##   J3  X       X               X   X                                               X                  
##   J4          X   X   X   X   X   X       X    X    X    X    X    X              X    X    X        
##   J5  X   X   X               X       X                                                          X   
##   J6      X   X   X           X                     X                                                
##   J7      X   X   X   X   X   X           X    X                                                     
##   J8  X   X   X   X           X   X                                          X                       
##   J9      X   X   X   X   X   X   X       X    X    X    X    X    X              X    X    X        
##  J10          X   X   X   X   X   X   X   X    X    X         X    X    X                            
## Other attributes are: M22, M23, M24, M25, M26, M27, ...
fc_permisos$concepts$plot()

mapa_calor <- plot_interactivo(fc_permisos)
mapa_calor
plot_dendograma(fc_permisos )

Arbol de decisión

Ahora realizamos un árbol de decisión con un porcentaje del 80% de los datos para entrenamiento y 20% para prueba.

sha_datos_permisos_positives.split <- sample.split(sha_datos_permisos_positives, SplitRatio = 0.8 )
sha_datos_permisos_positives.train <- subset(sha_datos_permisos_positives, sha_datos_permisos_positives.split == TRUE)
sha_datos_permisos_positives.test_train <- subset(sha_datos_permisos_positives, sha_datos_permisos_positives.split == FALSE)

permisos.tree <- tree(positives  ~ . -total -sha256,  data = sha_datos_permisos_positives.train )
plot(permisos.tree)
text(permisos.tree , pretty = 0)

permisos.predecir = predict(permisos.tree, sha_datos_permisos_positives.test_train )
permisos.cv <- cv.tree(permisos.tree)
permisos.cv
## $size
## [1] 7 6 5 4 3 2 1
## 
## $dev
## [1] 2480.654 2468.179 2464.927 2403.940 2513.106 2617.395 2858.442
## 
## $k
## [1]      -Inf  36.60952  50.11179  65.81134 190.74220 212.85556 311.35262
## 
## $method
## [1] "deviance"
## 
## attr(,"class")
## [1] "prune"         "tree.sequence"
plot(permisos.cv)

permisos.rpart.class <- rpart(positives  ~ . -total -sha256,  data = sha_datos_permisos_positives.train, method = 'class')
permisos.rpart.anova <- rpart(positives  ~ . -total -sha256,  data = sha_datos_permisos_positives.train, method = 'anova')
rpart.plot(permisos.rpart.class, extra = 100)
## Warning: All boxes will be white (the box.palette argument will be ignored) because
## the number of classes in the response 19 is greater than length(box.palette) 6.
## To silence this warning use box.palette=0 or trace=-1.

rpart.plot(permisos.rpart.anova, extra = 100)

## Análisis de factores Realizo un análisis de factores para intentar ver si con pocos factores se puede explicar el dataframe y trabajar con ellos para una visualización, sin embargo para obtener un 90% hacen falta 20 factores y con 2 sólo explicas un 50% de la varianza.

pr_permisos <- PCA(sha_datos_permisos_positives[,4:99], scale.unit = FALSE, ncp = 95, graph = TRUE)

get_pca(pr_permisos)
## Principal Component Analysis Results for variables
##  ===================================================
##   Name       Description                                    
## 1 "$coord"   "Coordinates for the variables"                
## 2 "$cor"     "Correlations between variables and dimensions"
## 3 "$cos2"    "Cos2 for the variables"                       
## 4 "$contrib" "contributions of the variables"
get_pca_var(pr_permisos)
## Principal Component Analysis Results for variables
##  ===================================================
##   Name       Description                                    
## 1 "$coord"   "Coordinates for the variables"                
## 2 "$cor"     "Correlations between variables and dimensions"
## 3 "$cos2"    "Cos2 for the variables"                       
## 4 "$contrib" "contributions of the variables"
get_pca_ind(pr_permisos)
## Principal Component Analysis Results for individuals
##  ===================================================
##   Name       Description                       
## 1 "$coord"   "Coordinates for the individuals" 
## 2 "$cos2"    "Cos2 for the individuals"        
## 3 "$contrib" "contributions of the individuals"
fviz_eig(pr_permisos)

fviz_screeplot(pr_permisos)

ggplotly(fviz_contrib(pr_permisos, choice = "var"))
## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## Please use `gather()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.

#Bibliografía https://www.alexejgossmann.com/benchmarking_r/

https://cran.r-project.org/web/packages/microbenchmark/microbenchmark.pdf

https://rpubs.com/rdelgado/405322 https://techvidvan.com/tutorials/decision-tree-in-r/#:~:text=Decision%20trees%20are%20a%20graphical,as%20well%20as%20classification%20problems. https://www.guru99.com/r-decision-trees.html https://plotly.com/r/ml-regression/ https://www.statmethods.net/stats/regression.html